# coding:utf-8
'''
抓取 52 书库的 "三国机密"
https://www.52shuku8.com/wenxue/1115/16419.html
'''
import urllib2
from bs4 import BeautifulSoup

import sys
reload(sys)
sys.setdefaultencoding('utf8')

def Main():
    url_list = ParseMainPage()
    for url in url_list:
        print "crawler url=" + url
        detail_page = OpenPage(url)
        data = ParseDetailPage(detail_page)
        WriteDataToFile("./result2.txt", data)
    print "crawler done!"

def OpenPage(url):
    headers = {}
    req = urllib2.Request(url, headers=headers)
    f = urllib2.urlopen(req)
    data = f.read()
    # 如果是 GBK 编码, 就转成 utf8. 转编码的时候要忽略错误.
    # data = data.decode('GBK', errors="ignore").encode('UTF-8')
    return data

def ParseMainPage():
    url_list = ['https://www.52shuku8.com/wenxue/1115/16419.html']
    for i in range(2, 218):
        url_list.append("https://www.52shuku8.com/wenxue/1115/16419_%d.html" % i)
    return url_list

def ParseDetailPage(page):
    soup = BeautifulSoup(page, 'html.parser')
    content = soup.find_all("p")
    result = [item.get_text() for item in content]
    return "\n".join(result)

def WriteDataToFile(file_path, data):
    '''
    数据写入到文件
    '''
    f = open(file_path, 'a+')
    f.write(data)
    f.close()

def Test():
    url = "https://www.52shuku8.com/wenxue/1115/16419_2.html"
    page = OpenPage(url)
    # print page
    result = ParseDetailPage(page)
    print str(result)

# Test()
Main()
